*****------------------------------------------------******
***** The legacies of authoritarian regimes  ----****
***** Prepare gender data  ------------------------*******


***-----------------------------------------------------------*****
***-----------------------------------------------------------*****
*1. RUNS DIAGNOSTICS FOR MACRO VARIABLES, GENERATES NEW VARIABLES, RECODES, ETC.

clear all
set more off
set scheme plottig 

cd "/Users/aneundorf/Dropbox/Project_Regimes/"


***-----------------------------------------------------------*****
***-----------------------------------------------------------*****
* Macro data

use "Data/Macro/PolRegime/VDem/Country_Year_V-Dem_Extended_STATA_v8/V-Dem-CY+Others-v8.dta", replace


drop if year<1920

**-------
** Change country code to be matched with WVS code

recode COWcode (260=255) // West Germany during seperation
recode COWcode (315=316) // Czech Rep before 1992

*browse  country_name COWcode if country_id==157
*browse  country_name COWcode if country_id==150

kountry COWcode, from(cown) to (iso3n)
rename _ISO3N_ wvs_code
label variable wvs_code "World Values Survey country code (ISO3)"
label values wvs_code countrylab1

*643 Russia 
recode wvs_code (810=643)

*Tanzania
recode wvs_code (835=834)

*taiwan
replace wvs_code=158 if COWcode==713


do "Data/DoFiles/Label_Cntry.do"
rename wvs country

save "Analysis/Gender/Data/MacroData_Gender.dta", replace



****------------------------------------------
**** Create Modernization Indes

sum e_Vanhanen_literate_ipo e_miurbani e_peinfmor e_pelifeex e_Vanhanen_nonag_ipo ///
v2peprisch v2pesecsch v2petersch e_miferrat e_pelifeex

pwcorr e_Vanhanen_literate_ipo e_miurbani e_peinfmor e_pelifeex e_Vanhanen_nonag_ipo v2peprisch v2pesecsch v2petersch e_miferrat 

foreach var of varlist e_Vanhanen_literate_ipo e_miurbani  e_pelifeex e_Vanhanen_nonag_ipo ///
v2peprisch v2pesecsch v2petersch  {
sum `var', d

gen `var'1 = `var' - r(min)
sum `var'1,d
gen `var'_01 = `var'1/r(max)
sum `var'_01,d
drop  `var'1
}



foreach var of varlist e_peinfmor e_miferrat {
sum `var', d

gen `var'1 = `var' - r(max)
sum `var'1,d
gen `var'_01 = `var'1/r(min)
sum `var'_01,d
drop  `var'1
}

sum e_Vanhanen_literate_ipo_01 e_miurbani_01 e_peinfmor_01 e_pelifeex_01 e_Vanhanen_nonag_ipo_01 v2peprisch_01 v2pesecsch_01 v2petersch_01 e_miferrat_01
pwcorr e_Vanhanen_literate_ipo_01 e_miurbani_01 e_peinfmor_01 e_pelifeex_01 e_Vanhanen_nonag_ipo_01 v2peprisch_01 v2pesecsch_01 v2petersch_01 e_miferrat_01

capture drop modern_new
egen modern_new = rowmean(e_Vanhanen_literate_ipo_01 e_miurbani_01 e_peinfmor_01 e_pelifeex_01 e_Vanhanen_nonag_ipo_01 v2peprisch_01 v2pesecsch_01 v2petersch_01 e_miferrat_01)

do "Data/DoFiles/CreateModernisation"

*save "Analysis/Gender/Data/MacroData_Gender.dta", replace

*** Explore coverage of modernization index
bys country: sum modernization modern_new v2x_gender

// Countries that are missing modern: Kyrkyzstan, Slovakia, US, 

// countries that are missing both: Andora, Belize, Palestine, Hong Kong, Puerto Rico, Bosnia Federation 


** Explore modernization index
tab year if modern_new!=.
pwcorr modernization modern_new e_migdppcln v2x_gender // R=0.68

tab modern_N if year>1919

**------------
** Regime variable 
tab v2x_regime
tab v2x_regime, nolabel

recode v2x_regime (0/1=0 "Autocracy") (2/3=1 "Democracy"), gen(democ_dum)
tab v2x_regime democ_dum

recode v2x_regime (0/1=1 "Autocracy") (2/3=0 "Democracy"), gen(autoc_dum)

** Age of democarcy
**You need to use the btscs add-on for state from Beck and Katz. 
**It needs to be installed manually because it does not have an automatic install procedure

btscs autoc_dum year COWcode, g(age_democarcy)
tab age_democarcy,m



*----------------
** Gender variables

sum v2x_gender v2x_gencl v2x_gencs v2x_genpp
pwcorr v2x_gender v2x_gencl v2x_gencs v2x_genpp

egen v2x_gender2 = rowmean(v2x_gencs v2x_genpp)
sum v2x_gender2


*** Reduce dataset
collapse COWcode country_id modernization  v2x_gender v2x_gender2 v2x_gencl v2x_gencs v2x_genpp v2lgfemleg democ_dum  v2x_polyarchy e_migdppcln e_migdppc  e_Vanhanen_literate_ipo e_miurbani e_peinfmor e_pelifeex e_Vanhanen_nonag_ipo e_mipopula ///
v2peprisch v2pesecsch v2petersch e_miferrat modern_new age_democarcy modern_N , by(country year)

tab democ_dum
recode democ_dum (0/0.4=0)


tsset country year 
gen modern_1 = L1.modernization
gen v2x_gender_1 = L1.v2x_gender
gen v2x_gender2_1 = L1.v2x_gender2
gen v2x_gencl_1 = L1.v2x_gencl
gen v2x_gencs_1 = L1.v2x_gencs
gen v2x_genpp_1 = L1.v2x_genpp
gen modern_new_1 = L1.modern_new
gen e_migdppcln_1 = L1.e_migdppcln
gen v2lgfemleg_1 = L1.v2lgfemleg


lab var modern_1 "Lagged Modernization Index"
lab var modern_new_1 "Lagged new Modernization Index"
lab var v2x_gender_1 "Lagged Women's Pol. Empowerment"
lab var v2x_gender2_1 "Lagged Women's Pol. Empowerment - only pol indicies"


sort COWcode year
save "Analysis/Gender/Data/MacroData_Gender.dta", replace

**--------------
** Add country names from V-Dem data

use "Data/Macro/PolRegime/VDem/Country_Year_V-Dem_Extended_STATA_v8/V-Dem-CY+Others-v8.dta", replace

keep COWcode country_name year
sort COWcode year
save  "Analysis/Gender/Data/V-Dem_cntrynames.dta", replace

use "Analysis/Gender/Data/MacroData_Gender.dta", replace
sort COWcode year

merge COWcode year using "Analysis/Gender/Data/V-Dem_cntrynames.dta"
drop _merge
drop if year<1920

sort country
merge country using "Analysis/Gender/Data/Country_cover.dta"
drop _merge
drop if indata!=1

sort country year
save "Analysis/Gender/Data/MacroData_Gender.dta", replace

**----------------------------------------------
*** Add country-years for merged countries 

use "Analysis/Gender/Data/MacroData_Gender.dta", replace
do "Analysis/Gender/Do-Files/Gender_SplitCnty"

tab country

sort country year
save "Analysis/Gender/Data/MacroData_Gender_long.dta", replace

***********--------------------------------------------
* Create socialization variable to be matched with cohorts
use "Analysis/Gender/Data/MacroData_Gender_long.dta", replace

tab year
gen cohortmatch5_15=.
set more off
foreach num of numlist 1900(5)2017 {
	replace cohortmatch5_15 = `num' if year==`num' | year==`num'+1 | year==`num'+2 | year==`num'+3 | year==`num'+4 
	}
tab cohort


collapse modernization modern_N v2x_gender v2x_gender2 v2x_gencl v2x_gencs v2x_genpp v2lgfemleg e_migdppcln e_pelifeex e_miferrat e_miurbani modern_new, by(country cohort)

rename modernization coh5_modernization 
rename v2x_gender coh5_v2x_gender
rename v2x_gender2 coh5_v2x_gender2
rename v2x_gencl coh5_v2x_gencl
rename v2x_gencs coh5_v2x_gencs
rename v2x_genpp coh5_v2x_genpp
rename v2lgfemleg coh5_v2lgfemleg
rename e_migdppcln coh5_e_migdppcln
rename e_pelifeex coh5_e_pelifeex
rename e_miferrat coh5_e_miferrat
rename e_miurbani coh5_e_miurbani
rename modern_new coh5_modern_new
rename modern_N coh5_modern_N


**--------------------------------------------------------------
*** Other socialisation variables
gen cohortmatch5_5 = cohortmatch5_15
gen cohortmatch5_10 = cohortmatch5_15
gen cohortmatch5_20 = cohortmatch5_15
gen cohortmatch5_25 = cohortmatch5_15
gen cohortmatch5_30 = cohortmatch5_15

 
sort country cohortmatch5_15
save "Analysis/Gender/Data/MacroData_cohort.dta", replace



***********--------------------------------------------
* Merging macro and micro data

use "/Users/aneundorf/Dropbox/Legacy of authoritarian regimes data/MergedData/MergedData.dta", clear
 
keep data year age country  women_lead married female religion educ employment educ_yr  cohort5 cohortmatch5_5 cohortmatch5_10 cohortmatch5_15 cohortmatch5_20 cohortmatch5_25 cohortmatch5_30 birth

 
 *** Prepare survey data
tab1 women_lead age  married female religion educ employment if women_lead!=. 
sum women_lead age  married female religion educ employment if women_lead!=. 


*** Education
recode educ educ_yr (0=.)(7/21=.)

*drop educ_combi
gen educ_combi = educ
replace educ_combi = educ_yr if educ==.

tab educ_combi,m
lab var educ_combi "Education: combined categorical and year variables"

*** Employment
*tab employment,m
*tab employment,nolabel

recode employment (1=1)(2=0), gen(working)

***---------------------
*** Reduce size of dataset
tab country if women_lead!=.

reg women_lead age i.female i.educ_combi i.working i.religion i.data, cluster(country)
drop if ~e(sample)

tab women_lead
tab country if women_lead!=.
tab country, nolabel

recode country (914=70)
/*
* Number of countries

preserve
collapse women_lead, by(country)
sum women_lead
tab country 
gen indata=1

sort country
save  "Analysis/Gender/Data/Country_cover.dta", replace
restore
*/
 
sort country year
merge  country year using "Analysis/Gender/Data/MacroData_Gender.dta"
drop _merge

sort country cohortmatch5_15
merge  country cohortmatch5_15 using "Analysis/Gender/Data/MacroData_cohort.dta"
drop _merge

reg women_lead age i.female i.educ_combi i.working i.religion i.data, cluster(country)

reg women_lead age i.female i.educ_combi i.working i.religion i.data coh5_modern_new coh5_v2x_gender v2x_gender_1  modern_new_1 if age>=20, cluster(country)
drop if ~e(sample)

save "Analysis/Gender/Data/MergedData_Gender.dta", replace



*** -----------------------------------

*** Compare countries that were included in the analysis to those excluded


use "Analysis/Gender/Data/MergedData_Gender.dta", clear

gen cnty_incl = 1

collapse cnty_incl, by(COWcode)
sort COWcode
save "Analysis/Gender/Data/Countries_Gender.dta", replace

use "Analysis/Gender/Data/MacroData_Gender_long.dta", replace
sort COWcode
merge  COWcode using "Analysis/Gender/Data/Countries_Gender.dta"
drop _merge

recode cnty_incl (.=0)
bys cnty_incl: sum modernization v2x_gender2 e_mipopula e_migdppc v2x_polyarchy

tab country_name cnty_incl 



** 

keep COWcode e_mipopula cnty_incl year 
drop if year<1996 | year >2000

collapse  e_mipopula cnty_incl, by(COWcode  year )
 
reshape wide e_mipopula cnty_incl, i(COWcode) j(year)

drop if e_mipopula==.
collapse e_mipopula, by(country year)
tab year

tab e_mipopula if year==2000

